************** Spatial Regressions ******************
* spatial transformation completed previously in spgen_4_* 

************************************
* Contents
		* Radiant based: section 2 horizontal, 6 vertical, counter-clockwise.
		* 4950 cells based on k_bins of width 3 KwH 
		* Outcomes: sales and count, each in level, log+1, and ihs transformation
		* standard errors clustered by cell   
************************************

************************************
* Variables
		* cm: country-month items (second level identifier)
		* cell: 1-50 are bunchers at window = 2. (primary identifier)
		* a_bin/e_bin: individual attributes, unstandardized 
		* csd  / ccd: sales / product count in cell. 
		
capture log close 
clear all
set more off 
*set maxvar 30000, permanently // required to load matrix 
* ssc install reghdfe 
* ssc install ftools

* Set directory
global server "R:\WSV2\TBu_AKe\Spatial_NEW" 
cd "$server" // set working directory 

capture mkdir Wmat_SCB_XVal
global store "${server}\Wmat_SCB_XVal"

*Define sub directories
global desktop "C:\Users\hy65byfe\Desktop\smerge_0712"
*cd "$desktop" // set working directory 


cd "$store"
 log using spatial_6_regs_s_1602_xval, replace   
 

			***************************
			* run regression: OLS, fe *
			***************************	
			
cd "C:\Users\hy65byfe\Desktop\smerge_0712\SCB_4950"	
*cd "R:\WSV2\TBu_AKe\Spatial_NEW\SCB_4950"	
use spatial_regs_s_0611_monthly, replace

*cd "C:\Users\hy65byfe\Desktop\smerge_0712\SCB_4950"			
*use spatial_regs_s_monthly_0102_overlap, replace  
			
*preserve
*spmatrix dir 
keep if year == 2014 // no changes. otherwise matrices are wrong in previous. 	
	
	
			***************************
			***** Single Matrix *****
			***************************	
		

** log and ihs transformations to the untransformed data **

		*** outcomes and polar cases (no q indexing needed, only double loop)
foreach varx of varlist csd L3_sd  		ccd L3_cd     {

   gen     log_`varx'=log(1+`varx')
   gen     ihs_`varx'=`varx'+(`varx'^2+1)^0.5
   quietly replace ihs_`varx'=log(ihs_`varx')
 }
 
 
		*** spatial variables *** 
forvalues q = 1(1)8 { 
	foreach varx of varlist 	S`q'_L3_sd C`q'_L3_sd B`q'_L3_sd S`q'_L3_cd C`q'_L3_cd B`q'_L3_cd	{

   gen     log_`varx'=log(1+`varx')
   gen     ihs_`varx'=`varx'+(`varx'^2+1)^0.5
   quietly replace ihs_`varx'=log(ihs_`varx')
	}
}

			**********************************************
			***** 	Cross-validation over countries  *****
			**********************************************
			
replace ccode = 7 if ccode == 8 // make list run 1-7 (Serbia was 7, SVN was 8)

forvalues q = 1(1)8 { 
gen v_e_`q' = . // empty vector 

}
		
		******************
		*** Fan (S) ******
		******************

    ** Results for log transformed data **
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  log_csd    log_L3_sd 	log_S`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_S`q'
		predict log_S`q'_`i', xb
		gen err_S`q'_`i' =  log_csd - log_S`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_S`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_S`q'_`i' if ccode == 1 // 600
		sum err_S`q'_`i' // 4,200 
		*/ 
	}
}

	** Calculate rmse **
	
forvalues q = 1(1)8 { 
	gen rmse_S`q'_log = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_S`q'_log = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_S*

	************************ compare rmse *****************************
sum rmse_S*



    ** Results for ihs transformed data **
	
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_S`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_S`q'
		predict ihs_S`q'_`i', xb
		gen err_S`q'_`i' =  ihs_csd - ihs_S`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_S`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_S`q'_`i' if ccode == 1 // 600
		sum err_S`q'_`i' // 4,200 
		*/ 
	}
}

	** Calculate rmse **
	
forvalues q = 1(1)8 { 
	gen rmse_S`q'_ihs = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_S`q'_ihs = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_S*	
	
	
sum rmse_S*
	

		************************
		*** 4 corners (C) ******
		************************
		
    ** Results for log transformed data **
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  log_csd    log_L3_sd 	log_C`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_C`q'
		predict log_C`q'_`i', xb
		gen err_C`q'_`i' =  log_csd - log_C`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_C`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_C`q'_`i' if ccode == 1 // 600
		sum err_C`q'_`i' // 4,200 
		*/ 
	}
}

	** Calculate RCME **
	
forvalues q = 1(1)8 { 
	gen rmse_C`q'_log = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector RCME
	replace rmse_C`q'_log = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_C*

	************************ compare RCME *****************************
sum rmse_C*



    ** Results for ihs transformed data **
	
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_C`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_C`q'
		predict ihs_C`q'_`i', xb
		gen err_C`q'_`i' =  ihs_csd - ihs_C`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_C`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_C`q'_`i' if ccode == 1 // 600
		sum err_C`q'_`i' // 4,200 
		*/ 
	}
}

	** Calculate RCME **
	
forvalues q = 1(1)8 { 
	gen rmse_C`q'_ihs = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector RCME
	replace rmse_C`q'_ihs = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_C*	
	
	
sum rmse_C*

		************************
		*** Beam (B) ***********
		************************
		
    ** Results for log transformed data **
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  log_csd    log_L3_sd 	log_B`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_B`q'
		predict log_B`q'_`i', xb
		gen err_B`q'_`i' =  log_csd - log_B`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_B`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_B`q'_`i' if ccode == 1 // 600
		sum err_B`q'_`i' // 4,200 
		*/ 
	}
}

	** Balculate RBME **
	
forvalues q = 1(1)8 { 
	gen rmse_B`q'_log = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector RBME
	replace rmse_B`q'_log = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_B*

	************************ compare RMSE *****************************
sum rmse_B*



    ** Results for ihs transformed data **
	
	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 1(1)8 { // loop over directions 
	
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_B`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_B`q'
		predict ihs_B`q'_`i', xb
		gen err_B`q'_`i' =  ihs_csd - ihs_B`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_B`q'_`i' if ccode == `i' // copy to empty vector 
		/* check predictions 
		sum err_B`q'_`i' if ccode == 1 // 600
		sum err_B`q'_`i' // 4,200 
		*/ 
	}
}

	** Calculate RMSE **
	
forvalues q = 1(1)8 { 
	gen rmse_B`q'_ihs = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector RBME
	replace rmse_B`q'_ihs = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_B*	
	
	
sum rmse_B*

log close 

log using results_summary_s_1502_xval, replace 

		********************
		** report results **
		********************
		
		** SETUP : cmx = 4950, matrix = fan/beam/4corners, date = monthly, FE = cell, month, ccode 
		
		**************
		** Fan (S) **
		*************
	
		******* reghdfe LOG w/ country and cell fixed-effects ******
					
sum rmse_S*_log

		******* reghdfe IHS w/ country and cell fixed-effects ******

sum rmse_S*_ihs



		*******************
		** 4 Corners (C) **
		*******************

		******* reghdfe LOG w/ country and cell fixed-effects ******
					
sum rmse_C*_log

		******* reghdfe IHS w/ country and cell fixed-effects ******

sum rmse_C*_ihs



		**************
		** Beam (B) **
		**************

		******* reghdfe LOG w/ country and cell fixed-effects ******
					
sum rmse_B*_log

		******* reghdfe IHS w/ country and cell fixed-effects ******

sum rmse_B*_ihs
		
				
log close 
			